# Ant-v2 env
# two objectives
# x-axis speed, y-axis speed

import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
from os import path
from moenvs.MujocoEnvs.constraint_env import CMOEnv

class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, model_path = path.join(path.abspath(path.dirname(__file__)), "assets/ant.xml"), frame_skip = 5)
        utils.EzPickle.__init__(self)
        self.obj_dim = 2
        self.action_space_type = "Continuous"
        self.reward_space = np.zeros((self.obj_dim,))

    def step(self, a):
        xposbefore = self.get_body_com("torso")[0]
        yposbefore = self.get_body_com("torso")[1]
        # if isinstance(a, (np.ndarray)):
        #     a = a[0]
        a = np.clip(a, -1.0, 1.0)
        self.do_simulation(a, self.frame_skip)

        xposafter = self.get_body_com("torso")[0]
        yposafter = self.get_body_com("torso")[1]

        ctrl_cost = .5 * np.square(a).sum()
        survive_reward = 1.0
        other_reward = - ctrl_cost + survive_reward

        vx_reward = (xposafter - xposbefore) / self.dt + other_reward
        vy_reward = (yposafter - yposbefore) / self.dt + other_reward

        state = self.state_vector()
        notdone = np.isfinite(state).all()
        done = not notdone
        ob = self._get_obs()

        x_velocity, y_velocity = (xposafter - xposbefore) / self.dt, (yposafter - yposbefore) / self.dt
        return ob, np.array([vx_reward, vy_reward],dtype=np.float32), done, {'x_velocity': x_velocity, 'y_velocity': y_velocity} 
    
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[2:],
            self.sim.data.qvel.flat,
        ])

    def reset_model(self):
        # qpos = self.init_qpos + self.np_random.uniform(
        #     size=self.model.nq, low=-0.1, high=0.1
        # )
        # qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1
        # self.set_state(qpos, qvel)
        c = 1e-3
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + c * self.np_random.standard_normal(self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        self.viewer.cam.distance = self.model.stat.extent * 0.5


class CMOAntEnv(AntEnv, CMOEnv):
    def __init__(self):
        CMOEnv.__init__(self, min_episode_cost=0.0, max_episode_cost=250.0, velocity_threshold=2.5745) 
        AntEnv.__init__(self,)
        self.obj_dim = 3
        self.reward_space = np.zeros((self.obj_dim,))

    def step(self, a):
        ob, rew, done, info = super().step(a)
        x_velocity, y_velocity = info['x_velocity'], info['y_velocity']
        velocity = np.sqrt(x_velocity**2 + y_velocity**2)
        cost = float(velocity > self.velocity_threshold)
        rew_vec = np.append(rew, cost)
        rew = rew_vec if self.linear_preference is None else np.dot(rew_vec[:len(self.linear_preference)], self.linear_preference)
        info.update({'cost': cost, 'rew_vec': rew_vec[:-1]})
        done = not self.is_healthy
        return ob, rew, done, info
    
    @property
    def is_healthy(self):
        state = self.state_vector()
        is_healthy = np.isfinite(state).all() and 0.2 <= state[2] <= 1.0
        return is_healthy